From deba4c0c91696b431bd95e0536d32c621b36a57b Mon Sep 17 00:00:00 2001 From: oliskoli Date: Thu, 4 Sep 2008 20:49:39 +0000 Subject: [PATCH] gbfile: Add support for reading UNICODE text files (using gbfgetstr). --- gbfile.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- gbfile.h | 8 ++++++- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/gbfile.c b/gbfile.c index 678093d84..73f76e7cb 100644 --- a/gbfile.c +++ b/gbfile.c @@ -257,7 +257,7 @@ gbfread(void *buf, const gbsize_t size, const gbsize_t members, gbfile *file) /* Check for an incomplete READ */ if ((members == 1) && (size > 1) && (result > 0) && (result < (int)size)) - fatal("%s: Unexpected end of files (EOF)!\n", file->module); + fatal("%s: Unexpected end of file (EOF)!\n", file->module); result /= size; @@ -750,6 +750,46 @@ gbfgetpstr(gbfile *file) return result; } +static char * +gbfgetucs2str(gbfile *file) +{ + int len = 0; + char *result = file->line; + + for (;;) { + char buff[8]; + int clen; + int c = gbfgetc(file); + + if ((c == EOF) && (len == 0)) return NULL; + + c = c | (gbfgetc(file) << 8); + if (file->big_endian) c = be_read16(&c); + + if (c == '\r') { + c = gbfgetc(file) | (gbfgetc(file) << 8); + if (file->big_endian) c = be_read16(&c); + if (c != '\n') + fatal("%s: Invalid unicode (UCS-2/%s endian) line break!\n", + file->module, + file->big_endian ? "Big" : "Little"); + break; + } + + clen = cet_ucs4_to_utf8(buff, sizeof(buff), c); + + if (len+clen >= file->linesz) { + file->linesz += 64; + result = file->line = xrealloc(file->line, file->linesz + 1); + } + memcpy(&result[len], buff, clen); + len += clen; + } + result[len] = '\0'; // terminate resulting string + + return result; +} + /* * gbfgetstr: Reads a string from file (util any type of line-breaks or eof or error) * except xfree and free you can do all possible things with the result @@ -761,6 +801,8 @@ gbfgetstr(gbfile *file) int len = 0; char *result = file->line; + if (file->unicode) return gbfgetucs2str(file); + for (;;) { char c = gbfgetc(file); @@ -887,4 +929,30 @@ gbfputpstr(const char *s, gbfile *file) return (len + 1); } +int +gbfunicode(gbfile *file) +{ + if (! file->unicode_checked) { + int c; + size_t pos; + + file->unicode_checked = 1; + + pos = gbftell(file); + gbfrewind(file); + c = gbfgetc(file) | (gbfgetc(file) << 8); + + if (c == 0xFEFF) file->big_endian = 0; + else if (c == 0xFFFE) file->big_endian = 1; + else { + gbfseek(file, pos, SEEK_SET); + return 0; + } + file->unicode = 1; + if (pos != 0) gbfseek(file, pos, SEEK_SET); + } + return file->unicode; +} + + /* Thats all, sorry. */ diff --git a/gbfile.h b/gbfile.h index 545ebfc68..662282ad9 100644 --- a/gbfile.h +++ b/gbfile.h @@ -23,9 +23,12 @@ #ifndef GBFILE_H #define GBFILE_H +#include +#include +#include #include "config.h" #include "defs.h" -#include +#include "cet.h" typedef struct gbfile_s { #ifdef DEBUG_MEM @@ -48,6 +51,8 @@ typedef struct gbfile_s { unsigned char big_endian:1; unsigned char binary:1; unsigned char gzapi:1; + unsigned char unicode:1; + unsigned char unicode_checked:1; } gbfile; @@ -95,4 +100,5 @@ int gbfputflt(const float f, gbfile *file); // write a float value int gbfputcstr(const char *s, gbfile *file); // write string including '\0' int gbfputpstr(const char *s, gbfile *file); // write as pascal string +int gbfunicode(gbfile *file); #endif -- 2.30.2